x86: Make IDT/GDT/LDT updates safe.
authorKeir Fraser <keir.fraser@citrix.com>
Wed, 21 Nov 2007 11:38:51 +0000 (11:38 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Wed, 21 Nov 2007 11:38:51 +0000 (11:38 +0000)
This involves either determining that the entry will not be
read/written while the update takes place, or atomically making the
entry 'present', or doing the entire write atomically, as appropriate.

This issue raised, and original patch provided, by Jan Beulich.

Signed-off-by: Keir Fraser <keir.fraser@eu.citrix.com>
xen/arch/x86/mm.c
xen/arch/x86/traps.c
xen/arch/x86/x86_32/seg_fixup.c
xen/include/asm-x86/desc.h
xen/include/asm-x86/system.h

index 9a428b19a682b734086d95d2219fc33777aa2900..2ec351f6179df9a399284ea704c24905a04b8e41 100644 (file)
@@ -3007,7 +3007,8 @@ long set_gdt(struct vcpu *v,
         return -EINVAL;
 
     /* Check the pages in the new GDT. */
-    for ( i = 0; i < nr_pages; i++ ) {
+    for ( i = 0; i < nr_pages; i++ )
+    {
         mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
         if ( !mfn_valid(mfn) ||
              !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
@@ -3073,23 +3074,15 @@ long do_update_descriptor(u64 pa, u64 desc)
 
     *(u64 *)&d = desc;
 
-    LOCK_BIGLOCK(dom);
-
     mfn = gmfn_to_mfn(dom, gmfn);
     if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
          !mfn_valid(mfn) ||
          !check_descriptor(dom, &d) )
-    {
-        UNLOCK_BIGLOCK(dom);
         return -EINVAL;
-    }
 
     page = mfn_to_page(mfn);
     if ( unlikely(!get_page(page, dom)) )
-    {
-        UNLOCK_BIGLOCK(dom);
         return -EINVAL;
-    }
 
     /* Check if the given frame is in use in an unsafe context. */
     switch ( page->u.inuse.type_info & PGT_type_mask )
@@ -3112,7 +3105,7 @@ long do_update_descriptor(u64 pa, u64 desc)
 
     /* All is good so make the update. */
     gdt_pent = map_domain_page(mfn);
-    memcpy(&gdt_pent[offset], &d, 8);
+    atomic_write64((uint64_t *)&gdt_pent[offset], *(uint64_t *)&d);
     unmap_domain_page(gdt_pent);
 
     put_page_type(page);
@@ -3122,8 +3115,6 @@ long do_update_descriptor(u64 pa, u64 desc)
  out:
     put_page(page);
 
-    UNLOCK_BIGLOCK(dom);
-
     return ret;
 }
 
index d8a1ec847d6cbb3409504c5b237db95b2bbd760b..b00400e01590b81d352ca4ac5e8252a6ea837ccd 100644 (file)
@@ -2583,7 +2583,10 @@ void set_system_gate(unsigned int n, void *addr)
 
 void set_task_gate(unsigned int n, unsigned int sel)
 {
+    idt_table[n].b = 0;
+    wmb(); /* disable gate /then/ rewrite */
     idt_table[n].a = sel << 16;
+    wmb(); /* rewrite /then/ enable gate */
     idt_table[n].b = 0x8500;
 }
 
index b6ada7a54483705f3b04b826c730805454d125a3..2dfe940d19f5d867fa65d00fd22547b15322ffb2 100644 (file)
@@ -153,7 +153,7 @@ static unsigned char twobyte_decode[256] = {
  *  @base  (OUT): Decoded linear base address.
  *  @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB).
  */
-int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
+static int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
 {
     struct vcpu *d = current;
     unsigned long *table, a, b;
@@ -204,7 +204,7 @@ int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
 }
 
 /* Turn a segment+offset into a linear address. */
-int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
+static int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
 {
     unsigned long base, limit;
 
@@ -216,10 +216,14 @@ int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
 
     *linear = base + off;
 
+    /* Conservatively check 32 bytes from returned linear base. */
+    if ( !access_ok(linear, 32) )
+        return 0;
+
     return 1;
 }
 
-int fixup_seg(u16 seg, unsigned long offset)
+static int fixup_seg(u16 seg, unsigned long offset)
 {
     struct vcpu *d = current;
     unsigned long *table, a, b, base, limit;
@@ -303,9 +307,8 @@ int fixup_seg(u16 seg, unsigned long offset)
     a &= ~0x0ffff; a |= limit & 0x0ffff;
     b &= ~0xf0000; b |= limit & 0xf0000;
     b ^= _SEGMENT_EC; /* grows-up <-> grows-down */
-    /* NB. These can't fault. Checked readable above; must also be writable. */
-    table[2*idx+0] = a;
-    table[2*idx+1] = b;
+    /* NB. This can't fault. Checked readable above; must also be writable. */
+    atomic_write64((uint64_t *)&table[2*idx], ((uint64_t)b<<32) | a);
     return 1;
 }
 
index 8817286f60ac185861c43c6314b44d58feaa33e0..2893b0b8d0163ceb36c1ac3e1b75001dd9145007 100644 (file)
@@ -143,6 +143,11 @@ typedef struct {
 
 #define _set_gate(gate_addr,type,dpl,addr)               \
 do {                                                     \
+    (gate_addr)->a = 0;                                  \
+    wmb(); /* disable gate /then/ rewrite */             \
+    (gate_addr)->b =                                     \
+        ((unsigned long)(addr) >> 32);                   \
+    wmb(); /* rewrite /then/ enable gate */              \
     (gate_addr)->a =                                     \
         (((unsigned long)(addr) & 0xFFFF0000UL) << 32) | \
         ((unsigned long)(dpl) << 45) |                   \
@@ -150,49 +155,53 @@ do {                                                     \
         ((unsigned long)(addr) & 0xFFFFUL) |             \
         ((unsigned long)__HYPERVISOR_CS64 << 16) |       \
         (1UL << 47);                                     \
-    (gate_addr)->b =                                     \
-        ((unsigned long)(addr) >> 32);                   \
 } while (0)
 
 #define _set_tssldt_desc(desc,addr,limit,type)           \
 do {                                                     \
+    (desc)[0].b = (desc)[1].b = 0;                       \
+    wmb(); /* disable entry /then/ rewrite */            \
     (desc)[0].a =                                        \
         ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
+    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
+    wmb(); /* rewrite /then/ enable entry */             \
     (desc)[0].b =                                        \
         ((u32)(addr) & 0xFF000000U) |                    \
         ((u32)(type) << 8) | 0x8000U |                   \
         (((u32)(addr) & 0x00FF0000U) >> 16);             \
-    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
-    (desc)[1].b = 0;                                     \
 } while (0)
 
 #elif defined(__i386__)
 
 typedef struct desc_struct idt_entry_t;
 
-#define _set_gate(gate_addr,type,dpl,addr) \
-do { \
-  int __d0, __d1; \
-  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
-  "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
-  "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
+#define _set_gate(gate_addr,type,dpl,addr)               \
+do {                                                     \
+    (gate_addr)->b = 0;                                  \
+    wmb(); /* disable gate /then/ rewrite */             \
+    (gate_addr)->a =                                     \
+        ((unsigned long)(addr) & 0xFFFFUL) |             \
+        ((unsigned long)__HYPERVISOR_CS << 16);          \
+    wmb(); /* rewrite /then/ enable gate */              \
+    (gate_addr)->b =                                     \
+        ((unsigned long)(addr) & 0xFFFF0000UL) |         \
+        ((unsigned long)(dpl) << 13) |                   \
+        ((unsigned long)(type) << 8) |                   \
+        (1UL << 15);                                     \
 } while (0)
 
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %%ax,2(%2)\n\t" \
- "rorl $16,%%eax\n\t" \
- "movb %%al,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %%ah,7(%2)\n\t" \
- "rorl $16,%%eax" \
- : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
+#define _set_tssldt_desc(desc,addr,limit,type)           \
+do {                                                     \
+    (desc)->b = 0;                                       \
+    wmb(); /* disable entry /then/ rewrite */            \
+    (desc)->a =                                          \
+        ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
+    wmb(); /* rewrite /then/ enable entry */             \
+    (desc)->b =                                          \
+        ((u32)(addr) & 0xFF000000U) |                    \
+        ((u32)(type) << 8) | 0x8000U |                   \
+        (((u32)(addr) & 0x00FF0000U) >> 16);             \
+} while (0)
 
 #endif
 
index c464e1d454e7fc79fcac0fdeff6f8a7c304d6382..7d000522e76d6a508cc9bd3d4ceb0adff451aed5 100644 (file)
@@ -256,6 +256,17 @@ static always_inline unsigned long long __cmpxchg8b(
 })
 #endif
 
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+#ifdef __i386__
+    uint64_t w = *p, x;
+    while ( (x = __cmpxchg8b(p, w, v)) != w )
+        w = x;
+#else
+    *p = v;
+#endif
+}
+
 #if defined(__i386__)
 #define mb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
 #define rmb()  __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")